#include <math.h>
#include "cell.h"
#include "coul_msm.h"
#include "nonbonded.h"
INLINE void vdw_switched(real r2, real inv_r2, real sig, real eps, real rsw2, real rcut2, real inv_denom_switch, real *ret_w, real *ret_dwdr) {
  real sig_r2 = sig * sig * inv_r2;
  real sig_r6 = sig_r2 * sig_r2 * sig_r2;
  real sig_r12 = sig_r6 * sig_r6;
  real w = (sig_r12 - 2 * sig_r6) * eps;
  real dwdr = 12 * (sig_r6 - sig_r12) * eps * inv_r2;

  if (r2 > rsw2) {
    real s, dsdr;
    s = (rcut2 - r2) * (rcut2 - r2) * (rcut2 + 2 * r2 - 3 * rsw2) * inv_denom_switch;
    dsdr = 12 * (rcut2 - r2) * (rsw2 - r2) * inv_denom_switch;
    dwdr = w * dsdr + dwdr * s;
    w = w * s;
  }
  *ret_w = w;
  *ret_dwdr = -dwdr;
}

INLINE void coul_shifted(real r2, real inv_r2, real r, real inv_r, real kqq, real inv_rcut2, real *ret_u, real *ret_dudr) {
  real u = kqq * inv_r;
  real dudr = -u * inv_r2;
  real s = (1.0 - r2 * inv_rcut2) * (1.0 - r2 * inv_rcut2);
  real dsdr = -4.0 * inv_rcut2 * (1.0 - r2 * inv_rcut2);
  *ret_u = u * s;
  *ret_dudr = u * dsdr + dudr * s;
}
INLINE void coul_original(real inv_r2, real inv_r, real kqq, real *ret_u, real *ret_dudr) {
  *ret_u = kqq * inv_r;
  *ret_dudr = (*ret_u) * inv_r2;
}
INLINE real dist2_atom_box(vec<real> * p, vec<real> * o, vec<real> * h) {
  vec<real> vp, v, up, u;
  vecsubv(vp, *p, *o);
  vecabs(v, vp);
  vecsubv(up, v, *h);
  vecsat(u, up);
  return vecnorm2(u);
}
INLINE real cell2box(celldata_t *cell, vec<real> * o, vec<real> * h) {
  vec<real> xmin = {0, 0, 0}, xmax = {0, 0, 0};
  for (int i = 0; i < cell->natom; i++) {
    vecminv(xmin, xmin, cell->x[i]);
    vecmaxv(xmax, xmax, cell->x[i]);
  }
  vecscaleaddv(*o, xmin, xmax, 0.5, 0.5);
  vecsubv(*h, xmax, *o);
}
#ifdef __sw__
extern "C"{
void nonbonded_force_lj_shifted_sw(cellgrid_t *grid, nonbonded_param_t *p, mdstat_t *stat);
void nonbonded_force_lj_rf_sw(cellgrid_t *grid, nonbonded_param_t *p, mdstat_t *stat);
void nonbonded_force_lj_msm_sw(cellgrid_t *grid, nonbonded_param_t *p, mdstat_t *stat);
}
#endif
void nonbonded_force_lj_shifted(cellgrid_t *grid, nonbonded_param_t *p, mdstat_t *stat) {
  #ifdef __sw__
  nonbonded_force_lj_shifted_sw(grid, p, stat);
  return;
  #endif
#include "nonbonded_prolog_filter.h"
  real inv_r2 = 1 / r2;
  real inv_r = sqrt(inv_r2);
  real r = sqrt(r2);
  real kqq = coul_const * icell->q[i] * jcell->q[j];
  real dwdr, dudr, w, u;
  real eps, sig;
  if (scaled) {
    sig = sigi14[i] + sigj14;
    eps = epsi14[i] * epsj14;
  } else {
    sig = sigi[i] + sigj;
    eps = epsi[i] * epsj;
  }
  real sig_r2 = sig * sig * inv_r2;
  real sig_r6 = sig_r2 * sig_r2 * sig_r2;
  real sig_r12 = sig_r6 * sig_r6;
  w = (sig_r12 - 2 * sig_r6) * eps;
  dwdr = 12 * (sig_r6 - sig_r12) * eps * inv_r2;

  if (r2 > rsw2) {
    real s, dsdr;
    s = (rcut2 - r2) * (rcut2 - r2) * (rcut2 + 2 * r2 - 3 * rsw2) * inv_denom_switch;
    dsdr = 12 * (rcut2 - r2) * (rsw2 - r2) * inv_denom_switch;
    dwdr = w * dsdr + dwdr * s;
    w = w * s;
  }
  dwdr = -dwdr;

  u = kqq * inv_r;
  dudr = -u * inv_r2;
  real s = (1.0 - r2 * inv_rcut2) * (1.0 - r2 * inv_rcut2);
  real dsdr = -4.0 * inv_rcut2 * (1.0 - r2 * inv_rcut2);
  dudr = -(u * dsdr + dudr * s);
  u = u * s;
#include "nonbonded_epilog_filter.h"
}

void nonbonded_force_lj_msm(cellgrid_t *grid, nonbonded_param_t *p, mdstat_t *stat) {
  #ifdef __sw__
  nonbonded_force_lj_msm_sw(grid, p, stat);
  return;
  #endif
#define NO_SKIP_EXCL
#include "nonbonded_prolog_filter.h"
#undef NO_SKIP_EXCL
  real inv_r2 = 1 / r2;
  real inv_r = sqrt(inv_r2);
  real r = sqrt(r2);
  real kqq = coul_const * icell->q[i] * jcell->q[j];
  real dwdr = 0, dudr = 0, w = 0, u = 0;
  if (!excluded) {
    real eps, sig;
    if (scaled) {
      sig = sigi14[i] + sigj14;
      eps = epsi14[i] * epsj14;
    } else {
      sig = sigi[i] + sigj;
      eps = epsi[i] * epsj;
    }
    real sig_r2 = sig * sig * inv_r2;
    real sig_r6 = sig_r2 * sig_r2 * sig_r2;
    real sig_r12 = sig_r6 * sig_r6;
    w = (sig_r12 - 2 * sig_r6) * eps;
    dwdr = 12 * (sig_r6 - sig_r12) * eps * inv_r2;

    if (r2 > rsw2) {
      real s, dsdr;
      s = (rcut2 - r2) * (rcut2 - r2) * (rcut2 + 2 * r2 - 3 * rsw2) * inv_denom_switch;
      dsdr = 12 * (rcut2 - r2) * (rsw2 - r2) * inv_denom_switch;
      dwdr = w * dsdr + dwdr * s;
      w = w * s;
    }
    dwdr = -dwdr;
  }
  real egamma = 1. - (r * inv_rcut) * msm_gamma(r * inv_rcut, order);
  real fgamma = 1. + (r2 * inv_rcut2) * msm_dgamma(r * inv_rcut, order);
  dudr = kqq * inv_r * fgamma * inv_r2;
  u = kqq * inv_r * egamma;
  if (excluded || scaled) {
    real u_excl = kqq * inv_r;
    real dudr_excl = u_excl * inv_r2;
    if (scaled) {
      u_excl *= (1 - scale14);
      dudr_excl *= (1 - scale14);
    }
    u -= u_excl;
    dudr -= dudr_excl;
  }
#include "nonbonded_epilog_filter.h"
}
#ifdef __sw__
//#include "sw/nonbonded_sw.h"
#endif
void nonbonded_force_lj_rf(cellgrid_t *grid, nonbonded_param_t *p, mdstat_t *stat) {
  #ifdef __sw__
  nonbonded_force_lj_rf_sw(grid, p, stat);
  return;
  #endif
  
  real eps_rf = 78.5;
  real eps_r = 1;
  real k_rf = (eps_rf - eps_r) / ((2 * eps_rf + eps_r) * p->rcut * p->rcut * p->rcut);
  real c_rf = (3 * eps_rf) / ((2 * eps_rf + eps_r) * p->rcut);
#include "nonbonded_prolog_filter.h"
  real inv_r2 = 1 / r2;
  real inv_r = sqrt(inv_r2);
  real r = sqrt(r2);
  real kqq = coul_const * icell->q[i] * jcell->q[j];
  real dwdr, dudr, w, u;
  real eps, sig;
  //printf("%d\n", scaled);
  if (scaled) {
    sig = sigj14 + sigi14[i];
    eps = epsj14 * epsi14[i];
  } else {
    sig = sigj + sigi[i];
    eps = epsj * epsi[i];
  }
  real sig_r2 = sig * sig * inv_r2;
  real sig_r6 = sig_r2 * sig_r2 * sig_r2;
  real sig_r12 = sig_r6 * sig_r6;
  w = (sig_r12 - 2 * sig_r6) * eps;
  dwdr = 12 * (sig_r6 - sig_r12) * eps * inv_r2;

  if (r2 > rsw2) {
    real s, dsdr;
    s = (rcut2 - r2) * (rcut2 - r2) * (rcut2 + 2 * r2 - 3 * rsw2) * inv_denom_switch;
    dsdr = 12 * (rcut2 - r2) * (rsw2 - r2) * inv_denom_switch;
    dwdr = w * dsdr + dwdr * s;
    w = w * s;
  }
  dwdr = -dwdr;
  u = kqq * (inv_r + k_rf * r2 - c_rf);
  dudr = kqq * (inv_r - 2.0 * k_rf * r2) * inv_r2;
  // if (w > 10000) {
    // printf("found large w=%f!\n", w);
  // }
#include "nonbonded_epilog_filter.h"
  // printf("%f %f %f %f\n", evdwl, stat->evdwl, ecoul, stat->ecoul);
  {
    celldata_t *c0 = get_cell_xyz(grid, 0, 0, 0);
    // printf("%f %f %f\n", c0->f[0].x, c0->f[0].y, c0->f[0].z);
  }
}
